# -*- coding: utf-8 -*-
import scrapy
from xiaohua_spider.items import  XiaohuaSpiderItem
from scrapy.selector import Selector
from scrapy.http import Request

class XiaohuaSpider(scrapy.Spider):
    name = 'xiaohua'
    allowed_domains = ['xiaohuar.com']
    start_urls = ['http://www.xiaohuar.com/list-1-1.html']
    # custom_settings = {
    #     'DEFAULT_REQUEST_HEADERS':{
    #         'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    #         'Accept_Language':'zh-CN,zh;q=0.9',
    #         'User - Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'
    #     }
    # }
    url_set =set()
    def parse(self, response):
        i_list = response.xpath('//*[@class="img"]/a')
        for i in i_list:
           detail_url = i.xpath('.//@href').extract_first()
           if detail_url in self.url_set:
                   pass
           else:
                 self.url_set.add(detail_url)
                 gallery_url = detail_url.replace('/p','/s')
                 yield Request(url=gallery_url,callback=self.img_parse)
    def img_parse(self,response):
           '//div[@class="inner"]/a/img/@src'
           '//h1/text()'
           src_list=Selector(response).xpath('//div[@class="inner"]/a/img/@src').extract()
           folder_name = Selector( response).xpath('//h1/text()').extract_first()

           for src in src_list:
               print('图片资源',src)
               img_url = src
               if img_url.startswith('https:'):
                   pass
               else:
                   img_url = 'http://www.xiaohuar.com'+img_url
               img_name = src.split('/')[-1]

               item = XiaohuaSpiderItem()
               item['folder_name'] =folder_name
               item['img_url'] = img_url
               item['img_name'] = img_name
               yield item

               for i in range(1, 20):
                    new_page = f'http://www.xiaohuar.com/list-1-{i}.html'
                    yield Request(url=new_page)



        # allpics = response.xpath('//div[@class="img"]/a')
        # for pic in allpics:
        #     item = XiaohuaSpiderItem()
        #     name = pic.xpath('./img/@alt').extract()[0]
        #     addr = pic.xpath('./img/@src').extract()[0]
        #     addr = 'http://www.xiaohuar.com' + addr
        #     item['name'] = name
        #     item['addr'] = addr
        #     yield item








